#import all libraries
import laspy as lp, sklearn as skl, numpy as np, matplotlib as mp, pandas as pd
from sklearn import cluster
from sklearn import preprocessing as prep
from sklearn.neighbors import NearestNeighbors
import matplotlib.pyplot as plt
import PIL
from PIL import ImageStat as istat
from PIL import ImageOps
#original dataset https://nrs.objectstore.gov.bc.ca/gdwuts/092/092g/2016/dsm/bc_092g025_3_4_2_xyes_8_utm10_20170601_dsm.laz
#renamed here for clarity
path_to_data = "F:/Data/Lidar/dtvan/dtvan.laz"
with lp.open(path_to_data) as las_file:
las_data = las_file.read()
# data loaded into a dataframe
df = pd.DataFrame({"X":list(las_data.x),"Y":list(las_data.y),"Z":list(las_data.z),"Intensity":las_data.intensity,"return_num":las_data.return_number,"totalreturns":las_data.num_returns,"classification":las_data.classification})
#full dataset displayed on a scatter plot
fig,ax = plt.subplots(figsize = (15,15))
ax.scatter(df['X'],df['Y'],zorder=1,alpha=0.25,c='black',s=0.001)
<matplotlib.collections.PathCollection at 0x120b4f0fdc0>
print("Total points:" + str(las_data.header.point_count))
Total points:20975490
print("Classes: " + str(set(list(df['classification']))))
Classes: {1, 2}
#data summary
df.describe()
| X | Y | Z | Intensity | return_num | totalreturns | classification | |
|---|---|---|---|---|---|---|---|
| count | 2.097549e+07 | 2.097549e+07 | 2.097549e+07 | 2.097549e+07 | 2.097549e+07 | 2.097549e+07 | 2.097549e+07 |
| mean | 4.917468e+05 | 5.458670e+06 | 3.049814e+01 | 8.719158e+01 | 1.089610e+00 | 1.179469e+00 | 1.096450e+00 |
| std | 5.265176e+02 | 3.876956e+02 | 2.793111e+01 | 8.505020e+01 | 3.125447e-01 | 4.422535e-01 | 2.952076e-01 |
| min | 4.909066e+05 | 5.458034e+06 | -9.260000e+00 | 1.000000e+00 | 1.000000e+00 | 1.000000e+00 | 1.000000e+00 |
| 25% | 4.912811e+05 | 5.458337e+06 | 1.176000e+01 | 4.700000e+01 | 1.000000e+00 | 1.000000e+00 | 1.000000e+00 |
| 50% | 4.917073e+05 | 5.458645e+06 | 2.478000e+01 | 6.700000e+01 | 1.000000e+00 | 1.000000e+00 | 1.000000e+00 |
| 75% | 4.921687e+05 | 5.458984e+06 | 3.776000e+01 | 1.020000e+02 | 1.000000e+00 | 1.000000e+00 | 1.000000e+00 |
| max | 4.927268e+05 | 5.459425e+06 | 2.330600e+02 | 4.064000e+03 | 5.000000e+00 | 5.000000e+00 | 2.000000e+00 |
# Define the area of interest, these values in meters are in the "NAD83 UTM 10" coordinate system of the provided dataset
# These are the upper and lower limits in meters to be used, these can be found using google maps or other free sites/software
# These were selected somewhat arbitrarily
aoi_extent = {'xmax':492349.0731766,'xmin':492043.6935073,'ymax':5458645.8660691,'ymin':5458340.4864470}
#query the dataframe to return only the points within the extent above and remove the points defined as ground as well
df_clip = df.query("X>{0}&X<{1}&Y>{2}&Y<{3}&classification==1".format(aoi_extent['xmin'],aoi_extent['xmax'],aoi_extent['ymin'],aoi_extent['ymax']))
df_clip
| X | Y | Z | Intensity | return_num | totalreturns | classification | |
|---|---|---|---|---|---|---|---|
| 4394229 | 492044.02 | 5458475.09 | 18.75 | 27 | 2 | 2 | 1 |
| 4394231 | 492044.00 | 5458475.08 | 18.74 | 26 | 2 | 2 | 1 |
| 4394233 | 492043.98 | 5458475.08 | 18.75 | 26 | 2 | 2 | 1 |
| 4394235 | 492043.94 | 5458475.08 | 18.78 | 32 | 2 | 2 | 1 |
| 4394237 | 492043.91 | 5458475.08 | 18.75 | 41 | 2 | 2 | 1 |
| ... | ... | ... | ... | ... | ... | ... | ... |
| 20825059 | 492345.70 | 5458600.09 | 15.56 | 153 | 1 | 1 | 1 |
| 20825060 | 492345.82 | 5458600.05 | 14.90 | 116 | 1 | 1 | 1 |
| 20825061 | 492346.11 | 5458600.05 | 14.84 | 139 | 1 | 1 | 1 |
| 20825062 | 492346.34 | 5458600.04 | 14.50 | 66 | 1 | 2 | 1 |
| 20825063 | 492346.61 | 5458600.04 | 14.41 | 29 | 1 | 2 | 1 |
789354 rows × 7 columns
#study area points displayed on a scatter plot
fig,ax = plt.subplots(figsize = (15,15))
ax.scatter(df_clip['X'],df_clip['Y'],zorder=1,alpha=0.5,c='black',s=0.01)
<matplotlib.collections.PathCollection at 0x120aaf1efa0>
#the height is used value to visualize in 3d, since the values are in meters for all 3 axes, it plots nicely as is
fig,ax = plt.subplots(figsize = (15,15)),plt.axes(projection='3d')
ax.scatter3D(df_clip['X'],df_clip['Y'],df_clip['Z'],c='black',s=0.01,alpha=0.5)
#from matplotlib import cm
#ax.plot_surface(df_clip['X'],df_clip['Y'],df_clip['Z'],cmap=cm.coolwarm,linewidth=0,antialiased=False)
<mpl_toolkits.mplot3d.art3d.Path3DCollection at 0x120ab891850>
# The Z values are an absolute elevation which is not as useful as height relative to the ground
df_ground = df.query("X>{0}&X<{1}&Y>{2}&Y<{3}&classification==2".format(aoi_extent['xmin'],aoi_extent['xmax'],aoi_extent['ymin'],aoi_extent['ymax']))
df_ground['Z']
# I need to create a ground image used to subtract from the elevation(Z) in order to get the height of the points relative to the ground
6437554 5.20
6441448 5.39
6441451 5.25
6441453 5.24
6441456 5.26
...
20822410 5.51
20822411 5.49
20822412 5.50
20822414 5.52
20822415 5.53
Name: Z, Length: 75058, dtype: float64
# Plotting the pre classified/labelled ground points for reference
fig,ax = plt.subplots(figsize = (10,10))
ax.scatter(df_ground['X'],df_ground['Y'],c='black',s=0.01,alpha=0.5)
<matplotlib.collections.PathCollection at 0x120ab8f49d0>
x_normal = (df_clip['X'] - min(df_clip['X']))/(max(df_clip['X']-min(df_clip['X'])))
y_normal = (df_clip['Y'] - min(df_clip['Y']))/(max(df_clip['Y']-min(df_clip['Y'])))
z_normal = (df_clip['Z'] - min(df_clip['Z']))/(max(df_clip['Z']-min(df_clip['Z'])))
i_normal = (df_clip['Intensity'] - min(df_clip['Intensity']))/(max(df_clip['Intensity']-min(df_clip['Intensity'])))
# new dataframe containing all the normalized values is created
df_normal = pd.DataFrame({'X':x_normal,'Y':y_normal,'Z':z_normal,'I':i_normal,'return_num':df_clip['return_num'],'total_returns':df_clip['totalreturns']})
df_normal
| X | Y | Z | I | return_num | total_returns | |
|---|---|---|---|---|---|---|
| 4394229 | 0.001048 | 0.440777 | 0.241716 | 0.006407 | 2 | 2 |
| 4394231 | 0.000982 | 0.440744 | 0.241629 | 0.006161 | 2 | 2 |
| 4394233 | 0.000917 | 0.440744 | 0.241716 | 0.006161 | 2 | 2 |
| 4394235 | 0.000786 | 0.440744 | 0.241974 | 0.007639 | 2 | 2 |
| 4394237 | 0.000688 | 0.440744 | 0.241716 | 0.009857 | 2 | 2 |
| ... | ... | ... | ... | ... | ... | ... |
| 20825059 | 0.988964 | 0.850116 | 0.214187 | 0.037457 | 1 | 1 |
| 20825060 | 0.989357 | 0.849985 | 0.208492 | 0.028339 | 1 | 1 |
| 20825061 | 0.990307 | 0.849985 | 0.207974 | 0.034007 | 1 | 1 |
| 20825062 | 0.991060 | 0.849953 | 0.205040 | 0.016018 | 1 | 2 |
| 20825063 | 0.991944 | 0.849953 | 0.204263 | 0.006900 | 1 | 2 |
789354 rows × 6 columns
# Plotting normalized looks the same but with the new scale
fig,ax = plt.subplots(figsize = (10,10))
ax.scatter(df_normal['X'],df_normal['Y'],c='black',s=0.01,alpha=0.5)
<matplotlib.collections.PathCollection at 0x120ab848e20>
image_path = "F:/Data/Lidar/images/BCVANC15_P9_aoiclip.tif"
img = PIL.Image.open(image_path)
rgb_img = img.convert("RGB")
rgb_img